#!/bin/bash
set -euo pipefail

START=$(date +%s)
START_FOR_QUERYING=$(date -u +"%Y-%m-%dT%TZ")
export START_FOR_QUERYING
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
CLUSTER_NAME_BASE="nth-test"
PRESERVE=false
TMP_DIR=""
DOCKER_ARGS=""
PROVISION_CLUSTER_ARGS=""
DELETE_CLUSTER_ARGS=""
DEFAULT_NODE_TERMINATION_HANDLER_DOCKER_IMG="node-termination-handler:customtest"
NODE_TERMINATION_HANDLER_DOCKER_IMG=""
DEFAULT_WEBHOOK_DOCKER_IMG="webhook-test-proxy:customtest"
WEBHOOK_DOCKER_IMG=""
OVERRIDE_PATH=0
K8S_VERSION="1.32"
AEMM_URL="amazon-ec2-metadata-mock-service.default.svc.cluster.local"
AEMM_VERSION="1.12.0"
AEMM_DL_URL="https://github.com/aws/amazon-ec2-metadata-mock/releases/download/v$AEMM_VERSION/amazon-ec2-metadata-mock-$AEMM_VERSION.tgz"
WEBHOOK_URL=${WEBHOOK_URL:="http://webhook-test-proxy.default.svc.cluster.local"}

ASSERTION_SCRIPTS=$(find "$SCRIPTPATH/../e2e" -type f | sort)
SCRIPT_DENYLIST=(
    "$SCRIPTPATH/../e2e/asg-launch-lifecycle-sqs-test"
)

function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }

function relpath() {
  perl -e 'use File::Spec; print File::Spec->abs2rel(@ARGV) . "\n"' "$1" "$2"
}

function retry {
  local retries=$1
  shift

  local count=0
  set +e
  trap "set -e" RETURN
  until "$@"; do
    exit=$?
    set -e
    wait=$((2 ** count))
    count=$((count + 1))
    if [ $count -lt "$retries" ]; then
      echo "Retry $count/$retries exited $exit, retrying in $wait seconds..."
      sleep $wait
    else
      echo "Retry $count/$retries exited $exit, no more retries left."
      return $exit
    fi
    set +e
  done
  return 0
}
export -f retry

function clean_up {
    if [[ "$PRESERVE" == false ]]; then
        "$SCRIPTPATH/../k8s-local-cluster-test/delete-cluster" "$DELETE_CLUSTER_ARGS" || :
        return
    fi
    echo "To resume test with the same cluster use: \"-c $TMP_DIR\""""
}

function exit_and_fail {
    local pod_id
    pod_id=$(get_nth_worker_pod || :)
    kubectl logs "$pod_id" --namespace kube-system || :
    END=$(date +%s)
    echo "⏰ Took $((END - START))sec"
    echo "❌ NTH Integration Test FAILED $CLUSTER_NAME! ❌"
    exit 1
}

function reset_cluster {
    echo "Resetting cluster"
    charts=$(helm ls --all --short)
    if [[ -n "$charts" ]]; then
        helm del $charts --no-hooks || :
    fi
    system_charts=$(helm ls --all --short --namespace kube-system)
    if [[ -n "$system_charts" ]]; then
        helm del $system_charts --no-hooks --namespace kube-system || :
    fi
    for node in $(kubectl get nodes | tail -n+2 | cut -d' ' -f1); do
        kubectl uncordon "$node"
        kubectl taint node "$node" aws-node-termination-handler/scheduled-maintenance- || true
        kubectl taint node "$node" aws-node-termination-handler/spot-itn- || true
        kubectl taint node "$node" aws-node-termination-handler/asg-lifecycle-termination- || true
        kubectl taint node "$node" aws-node-termination-handler/rebalance-recommendation- || true
    done
    remove_labels || :
    kubectl delete events --all
    sleep 2
}

function remove_labels {
    echo "Removing labels from NTH cluster nodes"

    labels_to_remove=()
    while IFS='' read -r line; do
      labels_to_remove+=("$line");
    done < <(kubectl get nodes -o json | jq '.items[].metadata.labels' | grep 'aws-node-termination-handler' | tr -d '[:blank:]' | tr -d '\"' | cut -d':' -f1)

    if [[ "${#labels_to_remove[@]}" -ne 0 ]]; then
      for l in "${labels_to_remove[@]}"; do
        for n in $(kubectl get nodes -o json | jq -r '.items[].metadata.name'); do
          echo "Deleting label $l on node $n"
          kubectl label node "$n" "$l"-
        done
      done
    fi
}

function get_nth_worker_pod {
    kubectl get pods -n kube-system \
      --selector 'app.kubernetes.io/name=aws-node-termination-handler' \
      --field-selector="status.phase=Running" \
      --sort-by=.metadata.creationTimestamp \
      -o go-template --template '{{range .items}}{{.metadata.name}} {{.metadata.creationTimestamp}}{{"\n"}}{{end}}' \
                                        | awk '$2 >= "'"${START_FOR_QUERYING//+0000/Z}"'" { print $1 }' | head -1
}

USAGE=$(cat << 'EOM'
  Usage: run-test [-p] [-d] [-o] [-a <ASSERTION_SCRIPT] [-b <TEST_BASE_NAME] [-c <CLUSTER_CONTEXT_DIR] [-n <NTH_DOCKER_IMG>] [-v K8S_VERSION] [-w <WEBHOOK_DOCKER_IMG>]
  Executes a test within a provisioned kubernetes cluster with NTH and IMDS pre-loaded.

  Example: run-test -p -n node-termination-handler:customtest

          Optional:
            -a          Assertion script (default is ALL tests in the e2e dir)
            -b          Base name of test (will be used for cluster too)
            -c          Cluster context directory, if operating on an existing cluster
            -p          Preserve kind k8s cluster for inspection
            -n          Node Termination Handler Docker Image
            -d          use GOPROXY=direct to bypass proxy.golang.org
            -o          Override path w/ your own kubectl and kind binaries
            -v          Kubernetes Version (Default: 1.32) [1.29, 1.30, 1.31 and 1.32]
            -w          Webhook Docker Image

EOM
)

# Process our input arguments
while getopts "pdn:oc:a:b:v:w:" opt; do
  case ${opt} in
    p ) # PRESERVE K8s Cluster
        echo "❄️  This run will preserve the cluster as you requested"
        PRESERVE=true
      ;;
    n ) # Node Termination Handler Docker Image
        NODE_TERMINATION_HANDLER_DOCKER_IMG="$OPTARG"
      ;;
    d ) # use GOPROXY=direct
        DOCKER_ARGS="--build-arg GOPROXY=direct"
      ;;
    o ) # Override path with your own kubectl and kind binaries
        DELETE_CLUSTER_ARGS="$DELETE_CLUSTER_ARGS -o"
        PROVISION_CLUSTER_ARGS="$PROVISION_CLUSTER_ARGS -o"
        OVERRIDE_PATH=1
      ;;
    c ) # Cluster context directory to operate on existing cluster
        TMP_DIR=$OPTARG
      ;;
    a ) # Assertion script
        ASSERTION_SCRIPTS=$(echo "$OPTARG" | tr "," "\n")
      ;;
    b ) # Base cluster name
        CLUSTER_NAME_BASE=$OPTARG
      ;;
    v ) # K8s VERSION
        K8S_VERSION=$OPTARG
      ;;
    w ) # Webhook Docker Image
        WEBHOOK_DOCKER_IMG="$OPTARG"
      ;;
    \? )
        echo "$USAGE" 1>&2
        exit
      ;;
  esac
done

if [[ -z $TMP_DIR ]]; then
    TMP_DIR=$("$SCRIPTPATH/../k8s-local-cluster-test/provision-cluster" -b "$CLUSTER_NAME_BASE" -v "$K8S_VERSION" "$PROVISION_CLUSTER_ARGS")
fi
if [[ $OVERRIDE_PATH -eq 0 ]]; then
  export PATH=$TMP_DIR:$PATH
else
  export PATH=$PATH:$TMP_DIR
fi
CLUSTER_NAME=$(cat "$TMP_DIR/clustername")

## Build and Load Docker Images

if [ -z "$NODE_TERMINATION_HANDLER_DOCKER_IMG" ]; then
    echo "🥑 Building the node-termination-handler docker image"
    docker buildx build --load $DOCKER_ARGS -t "$DEFAULT_NODE_TERMINATION_HANDLER_DOCKER_IMG" "$SCRIPTPATH/../../."
    NODE_TERMINATION_HANDLER_DOCKER_IMG="$DEFAULT_NODE_TERMINATION_HANDLER_DOCKER_IMG"
    echo "👍 Built the node-termination-handler docker image"
else
    echo "🥑 Skipping building the node-termination-handler docker image, since one was specified ($NODE_TERMINATION_HANDLER_DOCKER_IMG)"
fi
echo "$NODE_TERMINATION_HANDLER_DOCKER_IMG" > "$TMP_DIR/nth-docker-img"
NODE_TERMINATION_HANDLER_DOCKER_REPO=$(echo "$NODE_TERMINATION_HANDLER_DOCKER_IMG" | cut -d':' -f1)
NODE_TERMINATION_HANDLER_DOCKER_TAG=$(echo "$NODE_TERMINATION_HANDLER_DOCKER_IMG" | cut -d':' -f2)

if [ -z "$WEBHOOK_DOCKER_IMG" ]; then
    echo "🥑 Building the webhook-test-proxy docker image"
    docker buildx build --load $DOCKER_ARGS -t "$DEFAULT_WEBHOOK_DOCKER_IMG" "$SCRIPTPATH/../webhook-test-proxy/."
    WEBHOOK_DOCKER_IMG="$DEFAULT_WEBHOOK_DOCKER_IMG"
    echo "👍 Built the webhook-test-proxy docker image"
else
    echo "🥑 Skipping building the webhook-test-proxy docker image, since one was specified ($WEBHOOK_DOCKER_IMG)"
fi
echo "$WEBHOOK_DOCKER_IMG" > "$TMP_DIR/webhook-test-proxy-docker-img"
WEBHOOK_DOCKER_REPO=$(echo "$WEBHOOK_DOCKER_IMG" | cut -d':' -f1)
WEBHOOK_DOCKER_TAG=$(echo "$WEBHOOK_DOCKER_IMG" | cut -d':' -f2)

echo "🥑 Loading both images into the cluster"
kind load docker-image --name "$CLUSTER_NAME" "$NODE_TERMINATION_HANDLER_DOCKER_IMG"
kind load docker-image --name "$CLUSTER_NAME" "$WEBHOOK_DOCKER_IMG"
echo "👍 Loaded both images into the cluster"

export KUBECONFIG="$TMP_DIR/kubeconfig"

trap "exit_and_fail" INT TERM ERR
trap "clean_up" EXIT

cat << EOF >$TMP_DIR/env
export KUBECONFIG=$TMP_DIR/kubeconfig
echo "Updated KUBECONFIG=$KUBECONFIG"

export PATH=$TMP_DIR:\$PATH
echo "Updated PATH=$PATH"

EOF

echo "======================================================================================================"
echo "To poke around your test manually:"
echo ". $TMP_DIR/env"
echo "kubectl get pods -A"
echo "======================================================================================================"


### exported vars and funcs that tests can use
export TMP_DIR
export CLUSTER_NAME
export AEMM_URL
export AEMM_VERSION
export AEMM_DL_URL
export WEBHOOK_URL
export -f timeout
export -f relpath
export -f get_nth_worker_pod
export NODE_TERMINATION_HANDLER_DOCKER_IMG
export NODE_TERMINATION_HANDLER_DOCKER_REPO
export NODE_TERMINATION_HANDLER_DOCKER_TAG
export WEBHOOK_DOCKER_IMG
export WEBHOOK_DOCKER_REPO
export WEBHOOK_DOCKER_TAG
export AWS_REGION="us-east-1"
export WORKER_IP="192.168.0.1"
export NTH_CONTROL_LABEL="kubernetes\.io/hostname=$CLUSTER_NAME-control-plane"
export NTH_WORKER_LABEL="kubernetes\.io/hostname=ip-${WORKER_IP//\./-}.ec2.internal"
###

## Need to override hostname label for CTH localstack tests
kubectl label node "${CLUSTER_NAME}-worker" "$(echo $NTH_WORKER_LABEL | tr -d '\\')" --overwrite
## Mark worker2 only for Critical Add-Ons like dns
kubectl taint node "${CLUSTER_NAME}-worker2" CriticalAddonsOnly=true:NoSchedule --overwrite

function is_denylisted {
    if [[ ${SCRIPT_DENYLIST[*]} =~ (^|[[:space:]])$1($|[[:space:]]) ]]; then 
        return 0
    fi
    return 1
}

i=0
for assert_script in $ASSERTION_SCRIPTS; do
  if is_denylisted $assert_script; then continue; fi

  reset_cluster
  START_FOR_QUERYING=$(date -u +"%Y-%m-%dT%TZ")
  IMDS_PORT=$((i + 1338))
  export IMDS_PORT
  i=$((i + 1))
  echo "======================================================================================================"
  echo "🥑 Running assertion script $(basename "$assert_script")"
  echo "======================================================================================================"
  assert_start=$(date +%s)
  $assert_script
  assert_end=$(date +%s)
  echo "⏰ Took $((assert_end - assert_start))sec"
  POD_ID=$(get_nth_worker_pod || :)
  kubectl logs "$POD_ID" --namespace kube-system || :
  ## Resets cluster to run another test on the same cluster
  reset_cluster
  echo "✅ Assertion test $assert_script PASSED! ✅"
done

echo "======================================================================================================"
echo "✅ All tests passed! ✅"
echo "======================================================================================================"

